* Time series of firm size

	use if inrange(age, 40, 55) using "${clean_data}/panel_physicians_clean.dta", clear

	encode(cms_spec_code), gen(cms_spec_num)
	encode(stfips), gen(stfips_num)
	
	// Regressions
	cap log close
	log using "${output}/descriptives/firm_size_time_series.txt", replace text 	
	
	gen einsize_mi=(einsize==.)
	gen einsize1=(einsize==1) if !missing(einsize)
	gen einsize1to2=(einsize<3) if !missing(einsize)
	gen einsize1to5=(einsize<6) if !missing(einsize)
	gen einsize1to28=(einsize<29) if !missing(einsize)
	
	sum einsize* if !missing(einsize) & year==2017

	reg einsize1 ib2017.year i.age female i.cms_spec_num i.stfips_num, robust
	parmest, saving("${output}/descriptives/time_series_adjusted_einsize1.dta", replace) es(N)
	gen adjusted_sample=(e(sample)==1)

	reg einsize1 ib2017.year if adjusted_sample==1
	parmest, saving("${output}/descriptives/time_series_notadjusted_einsize1.dta", replace)	es(N)
	
	reg einsize_mi ib2017.year i.age female i.cms_spec_num i.stfips_num, robust
	parmest, saving("${output}/descriptives/time_series_adjusted_einsize_missing.dta", replace) es(N)
	gen adjusted_sample_einmiss=(e(sample)==1)
	
	reg einsize_mi ib2017.year if adjusted_sample_einmiss==1
	parmest, saving("${output}/descriptives/time_series_notadjusted_einsize_missing.dta", replace)	es(N)
	
	log close
	
	// Export data 
	foreach var in einsize1 einsize_missing {
		
		use "${output}/descriptives/time_series_notadjusted_`var'.dta", clear 
		rename (estimate es_1) (estimate_notadj_`var' N_notadj_`var'_UR)
		
		merge 1:1 parm using "${output}/descriptives/time_series_adjusted_`var'.dta", keep(master match) keepusing(estimate es_1) nogen
		ren (estimate es_1) (estimate_adj_`var' N_adj_`var'_UR)
		
		keep parm estimate* N*
		
		gen mean2017_`var'= estimate_notadj_`var'[14]
		drop if parm == "_cons"
		gen notadj_`var'=estimate_notadj_`var'+mean2017_`var'
		gen adj_`var'=estimate_adj_`var'+mean2017_`var'
		
		gen year=substr(parm, 1,4)
		destring year, replace
		
		gen sample  = "ages 40-55"
		
		tempfile temp`var'
		save `temp`var'', replace
	}
	
	use `tempeinsize1' , clear
	merge 1:1 parm using `tempeinsize_missing', nogen assert(3)

	keep notadj_einsize1 adj_einsize1 notadj_einsize_missing adj_einsize_missing year N*
	drbest_docinc notadj_einsize1 adj_einsize1 notadj_einsize_missing adj_einsize_missing
	order N*, last
	export delimited using "${mypath}/intermediate_csv/desc08-time_series_einsize.csv", replace dataf delim(tab)  	

* Income versus firm size

	use if inrange(age, 40, 55) using "${clean_data}/panel_physicians_clean.dta", clear	
	replace ptotinc = ptotinc/1000
 
	// Binscatter
	count if einsize == 1
		loc N_UR_einsize1 = `r(N)'
	count if einsize != .
		loc sh_einsize1 = `N_UR_einsize1' / `r(N)'
	count if einsize == . 
		loc N_UR_einsize_missing = `r(N)'
		loc sh_einsize_missing = `N_UR_einsize_missing' / `=_N'
		
	qui su einsize, d 
	loc mean = `r(mean)'
	qui centile einsize, centile(49.99 50.01)
	qui su einsize if einsize >= `r(c_1)' & einsize <= `r(c_2)'
	assert `r(N)' > 10
	loc median = `r(mean)'
	loc N_UR_median = `r(N)'
	
	#delimit ;	
	binscatter ptotinc einsize if einsize<100, genxq(einsizebin) linetype(connect) 
			ylab(, nogrid) xlab(0(5)100) ytitle("Annual Income" "($1,000)") 
			xtitle("Size of firm") xline(`median', lc(g8) noextend)
			note("Note: Firm only defined if observe at least one W-2." 
			"Firm size only includes physicians in our sample."
			"Restricted to EINs with fewer than 100 physicians" 
			" Age: 40 to 55; full panel"
			"(average doc-weighted size: `mean'; median: `median'; share of physicians in single-person firms: `sh_einsize1' )", size(small));

	#delimit cr	

	// Export data 
	keep if einsize < 100
	gcollapse (mean) ptotinc einsize (count) N_UR = einsize, by(einsizebin)
	g mean = `mean'
	g median = `median'
	g N_UR_median = `N_UR_median'
	g sh_einsize1 = `sh_einsize1'
	g N_UR_einsize1 = `N_UR_einsize1'
	g sh_einsize_missing = `sh_einsize_missing'
	g N_UR_einsize_missing = `N_UR_einsize_missing'
	g sample  = "ages 40-55"
	drbest_docinc ptotinc einsize mean median sh_einsize1 sh_einsize_missing
	order N_UR*, last
	export delimited using "$mypath/intermediate_csv/desc08-binscatter_ptotinc_by_einsize.csv", replace dataf delim(tab)  
	
